Skip to content

dash_charts.utils_json_cache⚓︎

Helpers for managing a generic JSON data file cache. Can be used to reduce API calls, etc.

Full dataset documentation: https://dataset.readthedocs.io/en/latest/api.html

View Source
"""Helpers for managing a generic JSON data file cache. Can be used to reduce API calls, etc.

Full dataset documentation: https://dataset.readthedocs.io/en/latest/api.html

"""

import json
import time
from pathlib import Path

from .utils_data import uniq_table_id, write_pretty_json
from .utils_dataset import DBConnect

#  FIXME: Add versioning to the cache directory with semver logic: https://pypi.org/project/semantic-version/

CACHE_DIR = Path(__file__).parent / 'local_cache'
"""Path to folder with all downloaded responses from Kitsu API."""

FILE_DATA = DBConnect(CACHE_DIR / '_file_lookup_database.db')
"""Global instance of the DBConnect() for the file lookup database."""

CACHE_TABLE_NAME = 'files'
"""Table name containing the cache file information."""

ID_KEY = 'identifier'
"""Name of the SQLite column containing the unique identifier."""
TS_KEY = 'timestamp'
"""Name of the SQLite column containing the timestamp."""
FILENAME_KEY = 'filename'
"""Name of the SQLite column containing the string filename."""

# TODO: Enable versioning of data and automatic deletion when the version changes
DATA_VERSION_KEY = 'data_version'
"""Key to indicate the data version."""


def get_files_table(db_instance):
    """Retrieve stored object from cache database.

    Args:
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        table: Dataset table for the files lookup

    """
    return db_instance.db.load_table(CACHE_TABLE_NAME)


def initialize_cache(db_instance):
    """Ensure that the directory and database exist. Remove files from database if manually removed.

    Args:
        db_instance: Connected Database file with `DBConnect()`.

    """
    table = db_instance.db.create_table(CACHE_TABLE_NAME)

    removed_files = []
    for row in table:
        if not Path(row[FILENAME_KEY]).is_file():
            removed_files.append(row[FILENAME_KEY])

    for filename in removed_files:
        table.delete(filename=filename)


def get_cache_dict(db_instance):
    """Return a dictionary `{identifier: path}` keys and values.

    Args:
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        dict: dictionary `{identifier: path}` keys and values

    """
    table = get_files_table(db_instance)
    return {row[ID_KEY]: Path(row[FILENAME_KEY]) for row in table}


def match_identifier_in_cache(identifier, db_instance):
    """Return list of matches for the given identifier in the file database.

    Args:
        identifier: identifier to use as a reference if the corresponding data is already cached
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        list: list of match object with keys of the SQL table

    """
    kwargs = {ID_KEY: identifier}
    return [*get_files_table(db_instance).find(**kwargs)]


def store_cache_as_file(prefix, identifier, db_instance, cache_dir=CACHE_DIR, suffix='.json'):
    """Store the reference in the cache database and return the file so the user can handle saving the file.

    Args:
        prefix: string used to create more recognizable filenames
        identifier: identifier to use as a reference if the corresponding data is already cached
        db_instance: Connected Database file with `DBConnect()`.
        cache_dir: path to the directory to store the file. Default is `CACHE_DIR
        suffix: string filename suffix. The default is `.json`

    Returns:
        Path: to the cached file. Caller needs to write to the file

    Raises:
        RuntimeError: if duplicate match found when storing

    """
    # Check that the identifier isn't already in the database
    matches = match_identifier_in_cache(identifier, db_instance)
    if matches:
        raise RuntimeError(f'Already have an entry for this identifier (`{identifier}`): {matches}')
    # Update the database and store the file
    filename = cache_dir / f'{prefix}_{uniq_table_id()}{suffix}'
    new_row = {FILENAME_KEY: str(filename), ID_KEY: identifier, TS_KEY: time.time()}
    get_files_table(db_instance).insert(new_row)
    return filename


def store_cache_object(prefix, identifier, obj, db_instance, cache_dir=CACHE_DIR):
    """Store the object as a JSON file and track in a SQLite database to prevent duplicates.

    Args:
        prefix: string used to create more recognizable filenames
        identifier: identifier to use as a reference if the corresponding data is already cached
        obj: JSON object to write
        db_instance: Connected Database file with `DBConnect()`.
        cache_dir: path to the directory to store the file. Default is `CACHE_DIR

    Raises:
        Exception: if duplicate match found when storing

    """
    filename = store_cache_as_file(prefix, identifier, db_instance, cache_dir)
    try:
        write_pretty_json(filename, obj)
    except Exception:
        # If writing the file fails, ensure that the record is removed from the database
        get_files_table(db_instance).delete(filename=filename)
        raise


def retrieve_cache_fn(identifier, db_instance):
    """Retrieve stored object from cache database.

    Args:
        identifier: identifier to use as a reference if the corresponding data is already cached
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        Path: to the cached file. Caller needs to read the file

    Raises:
        RuntimeError: if not exactly one match found

    """
    matches = match_identifier_in_cache(identifier, db_instance)
    if len(matches) != 1:
        raise RuntimeError(f'Did not find exactly one entry for this identifier (`{identifier}`): {matches}')
    return Path(matches[0][FILENAME_KEY])


def retrieve_cache_object(identifier, db_instance):
    """Retrieve stored object from cache database.

    Args:
        identifier: identifier to use as a reference if the corresponding data is already cached
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        dict: object stored in the cache

    """
    filename = retrieve_cache_fn(identifier, db_instance)
    return json.loads(filename.read_text())

Variables⚓︎

CACHE_DIR

Path to folder with all downloaded responses from Kitsu API.

CACHE_TABLE_NAME

Table name containing the cache file information.

DATA_VERSION_KEY

Key to indicate the data version.

FILENAME_KEY

Name of the SQLite column containing the string filename.

FILE_DATA

Global instance of the DBConnect() for the file lookup database.

ID_KEY

Name of the SQLite column containing the unique identifier.

TS_KEY

Name of the SQLite column containing the timestamp.

Functions⚓︎

get_cache_dict⚓︎

def get_cache_dict(
    db_instance
)

Return a dictionary {identifier: path} keys and values.

Parameters:

Name Description
db_instance Connected Database file with DBConnect().

Returns:

Type Description
dict dictionary {identifier: path} keys and values
View Source
def get_cache_dict(db_instance):
    """Return a dictionary `{identifier: path}` keys and values.

    Args:
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        dict: dictionary `{identifier: path}` keys and values

    """
    table = get_files_table(db_instance)
    return {row[ID_KEY]: Path(row[FILENAME_KEY]) for row in table}

get_files_table⚓︎

def get_files_table(
    db_instance
)

Retrieve stored object from cache database.

Parameters:

Name Description
db_instance Connected Database file with DBConnect().

Returns:

Type Description
table Dataset table for the files lookup
View Source
def get_files_table(db_instance):
    """Retrieve stored object from cache database.

    Args:
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        table: Dataset table for the files lookup

    """
    return db_instance.db.load_table(CACHE_TABLE_NAME)

initialize_cache⚓︎

def initialize_cache(
    db_instance
)

Ensure that the directory and database exist. Remove files from database if manually removed.

Parameters:

Name Description
db_instance Connected Database file with DBConnect().
View Source
def initialize_cache(db_instance):
    """Ensure that the directory and database exist. Remove files from database if manually removed.

    Args:
        db_instance: Connected Database file with `DBConnect()`.

    """
    table = db_instance.db.create_table(CACHE_TABLE_NAME)

    removed_files = []
    for row in table:
        if not Path(row[FILENAME_KEY]).is_file():
            removed_files.append(row[FILENAME_KEY])

    for filename in removed_files:
        table.delete(filename=filename)

match_identifier_in_cache⚓︎

def match_identifier_in_cache(
    identifier,
    db_instance
)

Return list of matches for the given identifier in the file database.

Parameters:

Name Description
identifier identifier to use as a reference if the corresponding data is already cached
db_instance Connected Database file with DBConnect().

Returns:

Type Description
list list of match object with keys of the SQL table
View Source
def match_identifier_in_cache(identifier, db_instance):
    """Return list of matches for the given identifier in the file database.

    Args:
        identifier: identifier to use as a reference if the corresponding data is already cached
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        list: list of match object with keys of the SQL table

    """
    kwargs = {ID_KEY: identifier}
    return [*get_files_table(db_instance).find(**kwargs)]

retrieve_cache_fn⚓︎

def retrieve_cache_fn(
    identifier,
    db_instance
)

Retrieve stored object from cache database.

Parameters:

Name Description
identifier identifier to use as a reference if the corresponding data is already cached
db_instance Connected Database file with DBConnect().

Returns:

Type Description
Path to the cached file. Caller needs to read the file

Raises:

Type Description
RuntimeError if not exactly one match found
View Source
def retrieve_cache_fn(identifier, db_instance):
    """Retrieve stored object from cache database.

    Args:
        identifier: identifier to use as a reference if the corresponding data is already cached
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        Path: to the cached file. Caller needs to read the file

    Raises:
        RuntimeError: if not exactly one match found

    """
    matches = match_identifier_in_cache(identifier, db_instance)
    if len(matches) != 1:
        raise RuntimeError(f'Did not find exactly one entry for this identifier (`{identifier}`): {matches}')
    return Path(matches[0][FILENAME_KEY])

retrieve_cache_object⚓︎

def retrieve_cache_object(
    identifier,
    db_instance
)

Retrieve stored object from cache database.

Parameters:

Name Description
identifier identifier to use as a reference if the corresponding data is already cached
db_instance Connected Database file with DBConnect().

Returns:

Type Description
dict object stored in the cache
View Source
def retrieve_cache_object(identifier, db_instance):
    """Retrieve stored object from cache database.

    Args:
        identifier: identifier to use as a reference if the corresponding data is already cached
        db_instance: Connected Database file with `DBConnect()`.

    Returns:
        dict: object stored in the cache

    """
    filename = retrieve_cache_fn(identifier, db_instance)
    return json.loads(filename.read_text())

store_cache_as_file⚓︎

def store_cache_as_file(
    prefix,
    identifier,
    db_instance,
    cache_dir=PosixPath('/Users/kyleking/Developer/packages/dash_charts/dash_charts/local_cache'),
    suffix='.json'
)

Store the reference in the cache database and return the file so the user can handle saving the file.

Parameters:

Name Description
prefix string used to create more recognizable filenames
identifier identifier to use as a reference if the corresponding data is already cached
db_instance Connected Database file with DBConnect().
cache_dir path to the directory to store the file. Default is `CACHE_DIR
suffix string filename suffix. The default is .json

Returns:

Type Description
Path to the cached file. Caller needs to write to the file

Raises:

Type Description
RuntimeError if duplicate match found when storing
View Source
def store_cache_as_file(prefix, identifier, db_instance, cache_dir=CACHE_DIR, suffix='.json'):
    """Store the reference in the cache database and return the file so the user can handle saving the file.

    Args:
        prefix: string used to create more recognizable filenames
        identifier: identifier to use as a reference if the corresponding data is already cached
        db_instance: Connected Database file with `DBConnect()`.
        cache_dir: path to the directory to store the file. Default is `CACHE_DIR
        suffix: string filename suffix. The default is `.json`

    Returns:
        Path: to the cached file. Caller needs to write to the file

    Raises:
        RuntimeError: if duplicate match found when storing

    """
    # Check that the identifier isn't already in the database
    matches = match_identifier_in_cache(identifier, db_instance)
    if matches:
        raise RuntimeError(f'Already have an entry for this identifier (`{identifier}`): {matches}')
    # Update the database and store the file
    filename = cache_dir / f'{prefix}_{uniq_table_id()}{suffix}'
    new_row = {FILENAME_KEY: str(filename), ID_KEY: identifier, TS_KEY: time.time()}
    get_files_table(db_instance).insert(new_row)
    return filename

store_cache_object⚓︎

def store_cache_object(
    prefix,
    identifier,
    obj,
    db_instance,
    cache_dir=PosixPath('/Users/kyleking/Developer/packages/dash_charts/dash_charts/local_cache')
)

Store the object as a JSON file and track in a SQLite database to prevent duplicates.

Parameters:

Name Description
prefix string used to create more recognizable filenames
identifier identifier to use as a reference if the corresponding data is already cached
obj JSON object to write
db_instance Connected Database file with DBConnect().
cache_dir path to the directory to store the file. Default is `CACHE_DIR

Raises:

Type Description
Exception if duplicate match found when storing
View Source
def store_cache_object(prefix, identifier, obj, db_instance, cache_dir=CACHE_DIR):
    """Store the object as a JSON file and track in a SQLite database to prevent duplicates.

    Args:
        prefix: string used to create more recognizable filenames
        identifier: identifier to use as a reference if the corresponding data is already cached
        obj: JSON object to write
        db_instance: Connected Database file with `DBConnect()`.
        cache_dir: path to the directory to store the file. Default is `CACHE_DIR

    Raises:
        Exception: if duplicate match found when storing

    """
    filename = store_cache_as_file(prefix, identifier, db_instance, cache_dir)
    try:
        write_pretty_json(filename, obj)
    except Exception:
        # If writing the file fails, ensure that the record is removed from the database
        get_files_table(db_instance).delete(filename=filename)
        raise

Last update: August 5, 2022
Created: August 5, 2022